library(tidyverse)
library(janitor)
library(broom)
library(readxl)
library(jsonlite)
library(gprofiler2)
theme_set(theme_bw())
set.seed(666)gvc_agora_opentargets
Setup environment
Read and prep data
GVC
Genes within 1Mb window of (each side of?) GVC loci from Fanny:
gvc <- read_xlsx("GVC_1Mb_comparison_050224.xlsx") %>%
clean_names() %>%
separate(gene_id, c("gene_id", "version")) %>%
select(-version, -agora_nominated_list, -opentarget_info)
gvcgvc.genes <- gvc %>% distinct(gene_id, .keep_all = TRUE) %>% select(gene_id, gene_symbol) %>% arrange(gene_symbol)
gvc.genesAgora
Alzheimer’s disease gene prioritization scores from Agora (see also related journal article):
ago1 <- read_json("agora.syn25741025.overall_scores.v12.2024-10-24.json", simplifyVector = TRUE) %>% as_tibble()
ago1Alzheimer’s disease genes (Agora nominated targets):
https://agora.adknowledgeportal.org/genes/nominated-targets
ago2 <- read_csv("agora.nominated-targets.gene-list.2024-10-24.csv")
ago2ago <- ago1 %>% filter(hgnc_symbol %in% ago2$`Gene Symbol`)OpenTargets
Alzheimer’s disease gene prioritization scores from OpenTargets:
# ot <- read_tsv("OT-MONDO_0004975-associated-targets-6_4_2024-v24_03.tsv", show_col_types = FALSE, na = "No data")
ot <- read_tsv("OT-MONDO_0004975-associated-targets-10_24_2024-v24_09.tsv", show_col_types = FALSE, na = "No data")
otAdd Ensembl Gene IDs (WTF!):
otcols <- colnames(ot)
otensg <- gconvert(
query = ot$symbol,
organism = "hsapiens",
target= "ENSG",
mthreshold = Inf,
filter_na = TRUE) %>%
mutate(input_number = as.character(input_number)) %>%
left_join(ot %>% rownames_to_column(var = "input_number"), by = "input_number") %>%
select(ensembl_gene_id = target, otcols)
otensgCorrelation of Agora and OpenTargets scores (GVC genes only)
d.cor <- gvc.genes %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id))d.cor %>% nrow()[1] 1347
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>% nrow()[1] 56
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>%
summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) %>%
unnest(cor)d.cor %>% nrow()[1] 1347
d.cor %>% drop_na(target_risk_score, globalScore) %>% nrow()[1] 75
d.cor %>% drop_na(target_risk_score, globalScore) %>%
summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) %>%
unnest(cor)Correlation of Agora and OpenTargets scores (all genes)
d.cor <- ago %>%
left_join(otensg, by = "ensembl_gene_id")d.cor %>% nrow()[1] 925
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>% nrow()[1] 75
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>%
summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) %>%
unnest(cor)d.cor %>% nrow()[1] 925
d.cor %>% drop_na(target_risk_score, globalScore) %>% nrow()[1] 488
d.cor %>% drop_na(target_risk_score, globalScore) %>%
summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) %>%
unnest(cor)Overlaps between GVC, Agora, and OpenTargets genes
x = list(
"GVC" = gvc.genes$gene_id,
"Agora" = ago$ensembl_gene_id,
"OpenTargets" = otensg$ensembl_gene_id
)library(VennDiagram)
grid.newpage()
v <- venn.diagram(
x,
fill = c("#FF0000", "#00FF00", "#0000FF"),
filename = NULL)
grid.draw(v)
p <- get.venn.partitions(x)
pORA of genes in overlaps
GVC ∩ Agora ∩ OpenTargets
genes <- p %>%
filter(..set.. == "GVC∩Agora∩OpenTargets") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)# save overlap gene ids for later
overlap_gene_ids <- queryGVC ∩ Agora
genes <- p %>%
filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)")) %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC ∩ OpenTargets
genes <- p %>%
filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩OpenTargets)∖(Agora)")) %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Agora ∩ OpenTargets
genes <- p %>%
filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(Agora∩OpenTargets)∖(GVC)")) %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC ∩ Agora) ∪ (GVC ∩ OpenTargets) ∪ (Agora ∩ OpenTargets)
genes <- p %>%
filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)", "(GVC∩OpenTargets)∖(Agora)", "(Agora∩OpenTargets)∖(GVC)")) %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(Agora ∩ OpenTargets) ∖ (GVC)
genes <- p %>%
filter(..set.. == "(Agora∩OpenTargets)∖(GVC)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC ∩ OpenTargets) ∖ (Agora)
genes <- p %>%
filter(..set.. == "(GVC∩OpenTargets)∖(Agora)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(OpenTargets) ∖ (GVC ∪ Agora)
genes <- p %>%
filter(..set.. == "(OpenTargets)∖(GVC∪Agora)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC ∩ Agora) ∖ (OpenTargets)
genes <- p %>%
filter(..set.. == "(GVC∩Agora)∖(OpenTargets)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(Agora) ∖ (GVC ∪ OpenTargets)
genes <- p %>%
filter(..set.. == "(Agora)∖(GVC∪OpenTargets)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC) ∖ (Agora ∪ OpenTargets)
genes <- p %>%
filter(..set.. == "(GVC)∖(Agora∪OpenTargets)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)ORA of GVC genes sorted by Agora or OpenTargets scores
GVC genes sorted by Agora’s genetics_score
Arrange by Agora’s genetics_score and OpenTargets’ otGeneticsPortal:
d1 <- gvc.genes %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(genetics_score), desc(otGeneticsPortal)) %>%
select(-c(symbol, hgnc_symbol)) %>%
select(gene_id, gene_symbol, genetics_score, otGeneticsPortal, everything())
d1query <- d1 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes sorted by OpenTargets’ otGeneticsPortal
Arrange by OpenTargets’ otGeneticsPortal and Agora’s genetics_score:
d2 <- gvc.genes %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(otGeneticsPortal), desc(genetics_score)) %>%
select(-c(symbol, hgnc_symbol)) %>%
select(gene_id, gene_symbol, otGeneticsPortal, genetics_score, everything())
d2query <- d2 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes sorted by Agora’s target_risk_score
Arrange by Agora’s target_risk_score and OpenTargets’ globalScore:
d3 <- gvc.genes %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(target_risk_score), desc(globalScore)) %>%
select(-c(symbol, hgnc_symbol)) %>%
select(gene_id, gene_symbol, target_risk_score, globalScore, everything())
d3query <- d3 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes sorted by OpenTargets’ globalScore
Arrange by OpenTargets’ globalScore and Agora’s target_risk_score:
d4 <- gvc.genes %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(globalScore), desc(target_risk_score)) %>%
select(-c(symbol, hgnc_symbol)) %>%
select(gene_id, gene_symbol, globalScore, target_risk_score, everything())
d4query <- d4 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC loci annotated with genes in overlaps
GVC ∩ Agora ∩ OpenTargets
gene_ids <- p %>%
filter(..set.. == "GVC∩Agora∩OpenTargets") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
distinct(gene_id) %>%
pull(gene_id)
length(gene_ids)[1] 75
gvc %>%
filter(gene_id %in% gene_ids) %>%
select(gvc_locus = grouped_loci_gvc, gene_id, gene_symbol) %>%
arrange(gene_symbol) %>%
mutate(gene = gene_symbol) %>%
# unite(gene, gene_id, gene_symbol, sep = ":", remove = FALSE) %>%
distinct(gvc_locus, gene, .keep_all = TRUE) %>%
group_by(gvc_locus) %>%
summarize(genes = str_c(gene, collapse = " | ")) %>%
select(gvc_locus, genes) %>%
gt::gt()| gvc_locus | genes |
|---|---|
| ABCA7 | ABCA7 | NDUFS7 |
| ABI3 / ACE | NGFR | ZNF652 |
| ACE | ACE |
| ADAM10 / MINDY2 | ADAM10 | ALDH1A2 | LIPC |
| ADAMTS4 | ADAMTS4 | FCER1G | NDUFS2 |
| ANK3 / CCDC6 | CCDC6 | SLC16A9 |
| ANKRD31 | ANKRD31 | ENC1 |
| APH1B | LACTB |
| APOE / TOMM40 | APOC1 | APOE | BCAM | MARK4 | NECTIN2 |
| APP | MRPL39 |
| APP / ADAMTS1 | ADAMTS1 |
| BCKDK / KAT8 / VKORC1 | BCKDK | STX4 | VKORC1 |
| BIN1 | BIN1 |
| CASS4 | CASS4 |
| CD2AP | CD2AP |
| CD33 | CD33 |
| CHRNE | ENO3 | RABEP1 | SLC25A11 | ZFP3 |
| CLU / PTK2B | CLU | EPHX2 | PTK2B | SCARA3 |
| CR1 | CR1 |
| CTSH | CTSH |
| DOC2A | DOC2A |
| ECHDC3 / USP6NL | USP6NL |
| EED / PICALM | DLG2 | PICALM |
| EPHA1 / EPHA1-AS1 | EPHA1 |
| HAVCR2 | CYFIP2 | HAVCR2 |
| HLA | HLA-DRA | HLA-DRB1 |
| ICA1 | NXPH1 |
| IDUA | CPLX1 |
| IL34 | MTSS2 |
| INPP5D | INPP5D |
| LILRB2 / TMC4 | LAIR1 |
| MADD / SPI1 | C1QTNF4 | NDUFS3 | NR1H3 | RAPSN | SPI1 |
| MS4A / MS4A2 / MS4A4A / MS4A6A | MRPL16 | MS4A2 | MS4A4A | MS4A6A |
| NDUFAF7 / PRKD3 | QPCT |
| NYAP1 / PILRA / SPDYE3 / ZCWPW1 | NYAP1 |
| OARD1 / TREM2 / TREML2 / UNC5CL | TREM2 |
| PLCG2 | PLCG2 | SDR42E1 |
| PLEKHA1 | HTRA1 |
| RABEP1 / SCIMP | ENO3 | RABEP1 | SLC25A11 | ZFP3 |
| RASGEF1C | MAPK9 |
| RIN3 / SLC24A4 | RIN3 | SLC24A4 |
| SHARPIN | PLEC |
| SIGLEC11 | NR1H2 |
| WNT3 | NSF |
ORA of Agora and OpenTargets genes sorted by global or genetic score
Agora genes sorted by genetics_score
d5 <- ago %>%
drop_na(genetics_score) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(genetics_score))
d5query <- d5 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Agora genes sorted by target_risk_score
d6 <- ago %>%
drop_na(target_risk_score) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(target_risk_score))
d6query <- d6 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Agora genes sorted by multi_omics_score
d7 <- ago %>%
drop_na(multi_omics_score) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(multi_omics_score))
d7query <- d7 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)OpenTargets genes sorted by otGeneticsPortal
d8 <- otensg %>%
drop_na(otGeneticsPortal) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(otGeneticsPortal))
d8query <- d8 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)OpenTargets genes sorted by globalScore
d9 <- otensg %>%
drop_na(globalScore) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(globalScore))
d9query <- d9 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)ORA of GVC genes
d10 <- read_xlsx("2024-08-29_GVC Table 1C - WORKING COPY.xlsx", sheet = "PG Gene List", skip = 1)
d10 %>% arrange(`PG RANK`)query <-
d10 %>%
rename(gene = `GVC expanded list of possible genes (500kb)`, rank = `PG RANK`) %>%
bind_rows(tibble(gene = "APOE", rank = 0)) %>%
arrange(rank) %>%
distinct(gene) %>%
pull(gene)
gostres <- gost(query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = FALSE, # <- UNORDERED QUERY!
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Check missing OpenTargets scores in Brian’s table
t <- read_xlsx("8-23-2024 - GVC Table 1C - WORKING COPYL_MRC.xlsx", skip = 1, na = "No data") |> janitor::clean_names() |> select(gvc_expanded_list_of_possible_genes_500kb, open_target_scores_global, open_target_scores_genetics) |> rename(symbol = gvc_expanded_list_of_possible_genes_500kb)t |>
left_join(ot, by = "symbol") |>
filter(round(open_target_scores_global, 4) != round(globalScore, 4)) |>
select(symbol, open_target_scores_global, globalScore)t |>
left_join(ot, by = "symbol") |>
filter(round(open_target_scores_genetics, 4) != round(otGeneticsPortal, 4)) |>
select(symbol, open_target_scores_genetics, otGeneticsPortal)Methods
Phase 2. Pathway analysis of GVC, Agora and OpenTargets candidate AD genes. We conducted gene set over-representation analysis (ORA) of GVC, Agora and OpenTargets candidate AD gene lists using R[37] with the gprofiler2 package (see https://github.com/marcoralab/gvc_agora_opentargets), excluding electronic Gene Ontology (GO) annotations, and filtering results using a p-value significance threshold of 0.005 after multiple testing correction with the g:SCS algorithm.
We used the GVC gene list of 1,344 genes in the proximity of AD risk loci that we built as described above. We retrieved Agora’s gene list of Alzheimer’s disease nominated targets (site version 3.4.0; data version syn13363290-v68) from https://agora.adknowledgeportal.org/genes/nominated-targets and the Agora’s gene scores (data version syn25741025-v12) from https://www.synapse.org/Synapse:syn25741025 on October 24th 2024. We retrieved OpenTargets’ gene list of Alzheimer’s disease (EFO:MONDO_0004975) associated targets (data version v24_09) from https://platform.opentargets.org/disease/MONDO_0004975/associations on October 24th 2024.
We conducted ORA using the following candidate AD gene lists: 1) Agora’s gene list of Alzheimer’s disease nominated targets decreasingly ordered by Agora’s genetics, multi-omics, or target risk score; 2) OpenTargets’ gene list of Alzheimer’s disease associated targets decreasingly ordered by OpenTargets’ genetics portal or global score; 3) GVC gene list decreasingly ordered by the aforementioned Agora’s or OpenTargets’ scores; 4) lists corresponding to regions of the Venn diagram built using R[37] with the VennDiagram package (see https://github.com/marcoralab/gvc_agora_opentargets) and three sets corresponding to gene lists 1-3, decreasingly ordered by the aforementioned Agora’s or OpenTargets’ scores.
Print environment
sessioninfo::session_info()─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.4.1 (2024-06-14)
os macOS Sonoma 14.7
system aarch64, darwin20
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/New_York
date 2024-10-24
pandoc 3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
backports 1.5.0 2024-05-23 [1] CRAN (R 4.4.0)
bit 4.5.0 2024-09-20 [1] CRAN (R 4.4.1)
bit64 4.5.2 2024-09-22 [1] CRAN (R 4.4.1)
bitops 1.0-9 2024-10-03 [1] CRAN (R 4.4.1)
broom * 1.0.7 2024-09-26 [1] CRAN (R 4.4.1)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.4.0)
cli 3.6.3 2024-06-21 [1] CRAN (R 4.4.0)
colorspace 2.1-1 2024-07-26 [1] CRAN (R 4.4.0)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.4.0)
crosstalk 1.2.1 2023-11-23 [1] CRAN (R 4.4.0)
data.table 1.16.2 2024-10-10 [1] CRAN (R 4.4.1)
digest 0.6.37 2024-08-19 [1] CRAN (R 4.4.1)
dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.4.0)
evaluate 1.0.1 2024-10-10 [1] CRAN (R 4.4.1)
fansi 1.0.6 2023-12-08 [1] CRAN (R 4.4.0)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.4.0)
forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.4.0)
formatR 1.14 2023-01-17 [1] CRAN (R 4.4.0)
futile.logger * 1.4.3 2016-07-10 [1] CRAN (R 4.4.0)
futile.options 1.0.1 2018-04-20 [1] CRAN (R 4.4.0)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.4.0)
ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.4.0)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.4.1)
gprofiler2 * 0.2.3 2024-02-23 [1] CRAN (R 4.4.0)
gt 0.11.1 2024-10-04 [1] CRAN (R 4.4.1)
gtable 0.3.5 2024-04-22 [1] CRAN (R 4.4.0)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.4.0)
htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.4.0)
httpuv 1.6.15 2024-03-26 [1] CRAN (R 4.4.0)
httr 1.4.7 2023-08-15 [1] CRAN (R 4.4.0)
janitor * 2.2.0 2023-02-02 [1] CRAN (R 4.4.0)
jsonlite * 1.8.9 2024-09-20 [1] CRAN (R 4.4.1)
knitr 1.48 2024-07-07 [1] CRAN (R 4.4.0)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.4.0)
lambda.r 1.2.4 2019-09-18 [1] CRAN (R 4.4.0)
later 1.3.2 2023-12-06 [1] CRAN (R 4.4.0)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.4.0)
lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.0)
lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.4.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.0)
mime 0.12 2021-09-28 [1] CRAN (R 4.4.0)
munsell 0.5.1 2024-04-01 [1] CRAN (R 4.4.0)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.4.0)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.4.0)
plotly 4.10.4 2024-01-13 [1] CRAN (R 4.4.0)
promises 1.3.0 2024-04-05 [1] CRAN (R 4.4.0)
purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.4.0)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.4.0)
Rcpp 1.0.13 2024-07-17 [1] CRAN (R 4.4.0)
RCurl 1.98-1.16 2024-07-11 [1] CRAN (R 4.4.0)
readr * 2.1.5 2024-01-10 [1] CRAN (R 4.4.0)
readxl * 1.4.3 2023-07-06 [1] CRAN (R 4.4.0)
rlang 1.1.4 2024-06-04 [1] CRAN (R 4.4.0)
rmarkdown 2.28 2024-08-17 [1] CRAN (R 4.4.0)
rstudioapi 0.17.1 2024-10-22 [1] CRAN (R 4.4.1)
sass 0.4.9 2024-03-15 [1] CRAN (R 4.4.0)
scales 1.3.0 2023-11-28 [1] CRAN (R 4.4.0)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.0)
shiny 1.9.1 2024-08-01 [1] CRAN (R 4.4.0)
snakecase 0.11.1 2023-08-27 [1] CRAN (R 4.4.0)
stringi 1.8.4 2024-05-06 [1] CRAN (R 4.4.0)
stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.4.0)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.4.0)
tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.4.0)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.4.0)
tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.4.0)
timechange 0.3.0 2024-01-18 [1] CRAN (R 4.4.0)
tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.4.0)
utf8 1.2.4 2023-10-22 [1] CRAN (R 4.4.0)
vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.0)
VennDiagram * 1.7.3 2022-04-12 [1] CRAN (R 4.4.0)
viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.4.0)
vroom 1.6.5 2023-12-05 [1] CRAN (R 4.4.0)
withr 3.0.1 2024-07-31 [1] CRAN (R 4.4.0)
xfun 0.48 2024-10-03 [1] CRAN (R 4.4.1)
xml2 1.3.6 2023-12-04 [1] CRAN (R 4.4.0)
xtable 1.8-4 2019-04-21 [1] CRAN (R 4.4.0)
yaml 2.3.10 2024-07-26 [1] CRAN (R 4.4.0)
[1] /Users/marcoe02/.Rlib
[2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
──────────────────────────────────────────────────────────────────────────────